# Replication Code for Plots in:
# Do Survey Experiments Capture Real-World Behavior? 
# External Validation of Conjoint and Vignette Analyses with a Natural Experiment
# Jens Hainmueller, Dominik Hangartner, Teppei Yamamoto

rm(list=ls())
library(foreign)
library("ggplot2")

# Figure 1, Figure S9, Figure S10

# function to prepare data
prepdata2 <- function(d){
  
  # prep estimates
  d$var <- rownames(d)
  colnames(d) <- c("pe","se","var")
  d$order <- 1:nrow(d)
  # compute Cis
  d$upper <-d$pe + 1.96*d$se
  d$lower <-d$pe - 1.96*d$se
  
  # define group
  d$group <- NA
  d$group[d$var %in% paste(c("1b",2),".gender",sep="")]    <- "Gender"
  d$group[d$var %in% paste(c("1b",2:8),".origin",sep="")]   <- "Origin"
  d$group[d$var %in% paste(c("1b",2:4),".age",sep="")] <- "Age"
  d$group[d$var %in% paste(c("1b",2:4),".ysince",sep="")]   <- "Years Since Arrival"
  
  d$group[d$var %in% paste(c("1b",2:3),".educ",sep="")]    <- "Education"     
  d$group[d$var %in% paste(c("1b",2:4),".integ",sep="")]   <- "Integration Status"
  d$group[d$var %in% paste(c("1b",2:3),".lang",sep="")] <- "German Proficiency"
  
  d <- d[is.na(d$group)==FALSE,]
  
  # order 
  d <- d[order(factor(d$group,levels=unique(d$group)[c(1:7)])),]
  d$order <- 1:nrow(d)
  
  # label attributes
  offset <- c("   ")
  d$var[d$group=="Gender"] <- paste(offset,c("Female","Male"))
  d$var[d$group=="Origin"] <- paste(offset,c("Netherlands","Germany","Austria","Italy","Turkey","Bosnia-Herzegovina","Croatia","form. Yugoslavia"))
  d$var[d$group=="Age"] <- paste(offset,c("21 Years Old", "30 Years Old","41 Years Old","55 Years Old"))
  d$var[d$group=="Years Since Arrival"] <- paste(offset,c("14 Years","20 Years","29 Years","Born in CH"))
  
  d$var[d$group=="Education"]          <- paste(offset,c("Low","Middle","High"))
  d$var[d$group=="Integration Status"] <- paste(offset,c("Traditions","Assimilated","Indistinguishable","Integrated"))
  d$var[d$group=="German Proficiency"] <- paste(offset,c("Adequate","Good","Perfect"))

  # sub in group labels
  dd <- data.frame(var= c("Gender:",
                          " ",
                          "Origin:",
                          "  ",
                          "Age:",
                          "   ",
                          "Years Since Arrival:",
                          "    ",
                          "Education:",
                          "            ",
                          "Integration Status:",
                          "             ",
                          "German Proficiency:"
  ),order=c(.5,2.1,2.5,6.1+4,6.5+4,10.1+4,10.5+4,14.1+4,14.5+4,17.1+4,17.5+4,21.1+4,21.5+4),
  pe=1,se=1,upper=1,lower=1,group=NA)
  d <- rbind(d,dd)
  d <-d[order(d$order),]
  d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])
  return(d)
}

# theme for figures
theme_bw1 <- function(base_size = 16, base_family = "") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = 13, colour = "black",  hjust = .5 , vjust=1),
      axis.text.y =       element_text(size = 13 , colour = "black", hjust = 0 , vjust=.5 ), 
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      legend.position = "none"
    )
}

# list of subsets
dl <- list()

conds <- c("Behavioral \n Benchmark","Paired Conjoint","Paired Conjoint \n Forced Choice",
           "Paired Vignette","Single Conjoint","Single Vignette","Paired Conjoint \n Forced Choice \n Student Sample")

dl[[1]] <- list(subfilename="fig1levelwyes",
                subsetnlabel="",
                slevels=c(1,3,2,4,5,6,7),
                slabels=conds,
                yylab="Effect on Rejection Probability")

dl[[2]] <- list(subfilename="fig1levelwno",
                subsetnlabel="",
                slevels=c(1,3,2,4,5,6,7),
                slabels=conds,
                yylab="Effect on Rejection Probability")

dl[[3]] <- list(subfilename="fig1deltawyes",
                subsetnlabel="",
                slevels=c(3,2,4,5,6,7),
                slabels=c("Forced Conjoint","Paired Conjoint","Paired Vignette",
                          "Single Conjoint","Single Vignette","Forced C. Student")[c(3-1,2-1,4-1,5-1,6-1,7-1)],
                yylab="Difference: Effects in Survey - Behavioral Benchmark")

dl[[4]] <- list(subfilename="fig1deltawno",
                subsetnlabel="",
                slevels=c(3,2,4,5,6,7),
                slabels=c("Forced Conjoint","Paired Conjoint","Paired Vignette",
                          "Single Conjoint","Single Vignette","Forced C. Student")[c(3-1,2-1,4-1,5-1,6-1,7-1)],
                yylab="Difference: Effects in Survey - Behavioral Benchmark")

)

# do the plots
for(kk in 1:length(dl)){
  
  filenames <- paste(paste(dl[[kk]]$subfilename,
                           dl[[kk]]$slevels,sep=""),
                     ".txt",sep="")
  
  alldata <- list()
  for(i in 1:length(filenames)){
    d <- read.table(filenames[i],na.strings=".")
    alldata[[i]] <- prepdata2(d)
    alldata[[i]]$subset      <- dl[[kk]]$slevels[i]
    alldata[[i]]$subsetlabel <- paste(dl[[kk]]$subsetnlabel,
                                      dl[[kk]]$slabels[i],sep=" ")
  }
  
  d <- alldata[[1]]
  for(i in 2:length(filenames)){
    d <- rbind(d,alldata[[i]])
  }
  
  d$subsetlabel <- factor(d$subsetlabel,levels=unique(d$subsetlabel))
  
  p = ggplot(d ,aes(y=pe,x=var, colour=group))
  p = p + facet_grid(.~subsetlabel)
  p = p + coord_flip(ylim = c(-.33, .33))
  p = p + geom_hline(yintercept = 0,size=.5,colour="darkgrey",linetype="solid") 
  p = p +geom_pointrange(aes(ymin=lower,ymax=upper,width=.5),position="dodge",size=.7)
  p = p + scale_y_continuous(name=paste(dl[[kk]]$yylab),breaks=seq(-.2,.2,.2),labels=c("-.2","0",".2")) 
  p = p + scale_x_discrete(name="")
  p = p + scale_fill_brewer(palette="Pastel1")
  p = p + geom_rect(data = subset(d,subsetlabel == levels(d$subsetlabel)[1]),aes(fill = subsetlabel),xmin = -Inf,xmax = Inf,
                    ymin = -.32,ymax = .33,alpha = 0,size=2,colour="gold")
  p = p + theme_bw1()
  print(p)
  
  dev.off()
  pdf(paste("1",dl[[kk]]$subfilename,".pdf",sep=""),width=14,height=10)
  print(p)
  dev.off()
  
  write.csv(d[,c("pe","se","var","upper","lower","group","subset","subsetlabel")],
            file=paste("1",dl[[kk]]$subfilename,".csv",sep=""))  
}

# Figure S11
# Aggregated Origin Groups

# function to prepare data
prepdata <- function(d){
  
  # prep estimates
  d$var <- rownames(d)
  colnames(d) <- c("pe","se","var")
  d$order <- 1:nrow(d)
  # compute Cis
  d$upper <-d$pe + 1.96*d$se
  d$lower <-d$pe - 1.96*d$se
  
  # define group
  d$group <- NA
  d$group[d$var %in% paste(c("1b",2),".gender",sep="")]    <- "Gender"
  d$group[d$var %in% paste(c("1b",2:4),".originR",sep="")]   <- "Origin"
  d$group[d$var %in% paste(c("1b",2:4),".age",sep="")] <- "Age"
  d$group[d$var %in% paste(c("1b",2:4),".ysince",sep="")]   <- "Years Since Arrival"
  
  d$group[d$var %in% paste(c("1b",2:3),".educ",sep="")]    <- "Education"     
  d$group[d$var %in% paste(c("1b",2:4),".integ",sep="")]   <- "Integration Status"
  d$group[d$var %in% paste(c("1b",2:3),".lang",sep="")] <- "German Proficiency"
  
  d <- d[is.na(d$group)==FALSE,]
 
  d <- d[order(factor(d$group,levels=unique(d$group)[c(1:7)])),]
  d$order <- 1:nrow(d)
  offset <- c("   ")

  d$var[d$group=="Gender"] <- paste(offset,c("Female","Male"))
  d$var[d$group=="Origin"] <- paste(offset,c("North West", "South","Turkey","Yugoslavia"))
  d$var[d$group=="Age"] <- paste(offset,c("21 Years Old", "30 Years Old","41 Years Old","55 Years Old"))
  d$var[d$group=="Years Since Arrival"] <- paste(offset,c("14 Years","20 Years","29 Years","Born in CH"))
  
  d$var[d$group=="Education"]          <- paste(offset,c("Low","Middle","High"))
  d$var[d$group=="Integration Status"] <- paste(offset,c("Traditions","Assimilated","Indistinguishable","Integrated"))
  d$var[d$group=="German Proficiency"] <- paste(offset,c("Adequate","Good","Perfect"))
  
  # sub in group labels
  dd <- data.frame(var= c("Gender:",
                          " ",
                          "Origin:",
                          "  ",
                          "Age:",
                          "   ",
                          "Years Since Arrival:",
                          "    ",
                          "Education:",
                          "            ",
                          "Integration Status:",
                          "             ",
                          "German Proficiency:"
                          ),order=c(.5,2.1,2.5,6.1,6.5,10.1,10.5,14.1,14.5,17.1,17.5,21.1,21.5),
  pe=1,se=1,upper=1,lower=1,group=NA)
  d <- rbind(d,dd)
  d <-d[order(d$order),]
  d$var <- factor(d$var,levels=unique(d$var)[length(d$var):1])
  return(d)
}

# theme for figures
theme_bw1 <- function(base_size = 16, base_family = "") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = 13, colour = "black",  hjust = .5 , vjust=1),
      axis.text.y =       element_text(size = 13 , colour = "black", hjust = 0 , vjust=.5 ), # changes position of X axis text
      axis.ticks =        element_line(colour = "grey50"),
      axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      legend.position = "none"
    )
}

yylab  <- c("Effect on Rejection Probability")
dl <- list()
conds <- c("Behavioral \n Benchmark","Paired Conjoint","Paired Conjoint \n Forced Choice",
           "Paired Vignette","Single Conjoint","Single Vignette","Paired Conjoint \n Forced Choice \n Student Sample")

dl[[1]] <- list(subfilename="figS11",
                subsetnlabel="",
                slevels=c(1,3,2,4,5,6,7),
                slabels=conds)

# do the plots
for(kk in 1:length(dl)){
  
  filenames <- paste(paste(dl[[kk]]$subfilename,
                           dl[[kk]]$slevels,sep=""),
                     ".txt",sep="")
  
  alldata <- list()
  for(i in 1:length(filenames)){
    d <- read.table(filenames[i])
    alldata[[i]] <- prepdata(d)
    alldata[[i]]$subset      <- dl[[kk]]$slevels[i]
    alldata[[i]]$subsetlabel <- paste(dl[[kk]]$subsetnlabel,
                                      dl[[kk]]$slabels[i],sep=" ")
  }
  
  d <- alldata[[1]]
  for(i in 2:length(filenames)){
    d <- rbind(d,alldata[[i]])
  }
  

  d$subsetlabel <- factor(d$subsetlabel,levels=unique(d$subsetlabel))
  p = ggplot(d ,aes(y=pe,x=var, colour=group))
  p = p + facet_grid(.~subsetlabel)
  p = p + coord_flip(ylim = c(-.33, .33))
  p = p + geom_hline(yintercept = 0,size=.5,colour="darkgrey",linetype="solid") 
  p = p +geom_pointrange(aes(ymin=lower,ymax=upper,width=.5),position="dodge",size=.7)
  p = p + scale_y_continuous(name=yylab,breaks=seq(-.2,.2,.2),labels=c("-.2","0",".2")) 
  p = p + scale_x_discrete(name="")
  p = p + scale_fill_brewer(palette="Pastel1")
  p = p + geom_rect(data = subset(d,subsetlabel == levels(d$subsetlabel)[1]),aes(fill = subsetlabel),xmin = -Inf,xmax = Inf,
                    ymin = -.32,ymax = .33,alpha = 0,size=2,colour="gold")
  p = p + theme_bw1()
  print(p)
  dev.off()
  
  pdf(paste("1",dl[[kk]]$subfilename,".pdf",sep=""),width=14,height=10)
  print(p)
  dev.off()
  
  write.csv(d[,c("pe","se","var","upper","lower","group","subset","subsetlabel")],
            file=paste("1",dl[[kk]]$subfilename,"Agg.csv",sep=""))
  
}


# Figure 2
rm(list=ls())

d <- read.table("fig2.txt")
d$lo <- d$pe-1.96*d$se
d$hi <- d$pe+1.96*d$se

titlelabs.s <- c("Paired\nConjoint", "Paired\nVignette" , "Single\nConjoint", "Single\nVignette")

pdf("fig2.pdf",width=5,height=7)
plot(0, 0, xlim = c(.5,4.5), ylim = c(45,80), type = "n",
     xlab = "", ylab = "% of respondents that accept all applicants", xaxt = "n")
abline(h=seq(0,4,1), col="gray")
axis(1, 1:4, titlelabs.s, padj=1)
points(1:4, d$pe, pch=19)
arrows(1:4, d$lo, 1:4, d$hi, angle=90, code=3)
dev.off()


# Figure S12
d <- read.table("S12.txt")

d$lo <- d$pe-1.96*d$se
d$hi <- d$pe+1.96*d$se
titlelabs.s <- c("Paired\nConjoint", "Forced\nChoice", "Paired\nVignette" , "Single\nConjoint", "Single\nVignette")

pdf("figS12.pdf",width=8,height=6)
plot(0, 0, xlim = c(.5,5.5), ylim = c(0,3), type = "n",
     xlab = "", ylab = "Survey was too long", xaxt = "n")
abline(h=seq(0,4,1), col="gray")
axis(1, 1:5, titlelabs.s, padj=1)
points(1:5, d$pe, pch=19)
arrows(1:5, d$lo, 1:5, d$hi, angle=90, code=3)
dev.off()

# Figure S13
d <- read.table("S13.txt")
d$lo <- d$pe-1.96*d$se
d$hi <- d$pe+1.96*d$se

pdf("figS13.pdf",width=8,height=6)
plot(0, 0, xlim = c(.5,5.5), ylim = c(0,3), type = "n",
     xlab = "", ylab = "Survey was complicated", xaxt = "n")
abline(h=seq(0,4,1), col="gray")
axis(1, 1:5, titlelabs.s, padj=1)
points(1:5, d$pe, pch=19)
arrows(1:5, d$lo, 1:5, d$hi, angle=90, code=3)
dev.off()


 

